Load data

cafe = read_csv(here::here("data/Sidewalk_Caf__Licenses_and_Applications_clean.csv"))
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   zip = col_double(),
##   swc_sq_ft = col_double(),
##   swc_tables = col_double(),
##   swc_chairs = col_double(),
##   lat = col_double(),
##   long = col_double(),
##   community_district = col_double(),
##   city_council_district = col_double(),
##   app_sq_ft = col_double(),
##   app_tables = col_double(),
##   app_chairs = col_double(),
##   app_status_date = col_datetime(format = ""),
##   expiration_date = col_datetime(format = ""),
##   app_too_date = col_datetime(format = ""),
##   submit_date = col_datetime(format = ""),
##   intake_dd = col_datetime(format = ""),
##   send_package_dd = col_datetime(format = ""),
##   cp_dd = col_datetime(format = ""),
##   cb_dd = col_datetime(format = ""),
##   hearing_dd = col_datetime(format = "")
##   # ... with 4 more columns
## )
## See spec(...) for full column specifications.
parking = read_csv(
  here::here("data/Parking_Violations_Issued_-_Fiscal_Year_2021.csv"),
  col_types = "dccccdcccddddcdddcccccccccddclccccddcdcclll"
) %>% 
  janitor::clean_names()
## Warning: 1606926 parsing failures.
## row                 col               expected   actual                                                                                                                       file
## 188 Date First Observed no trailing characters ,200,626 'C:/Users/YULIU/Desktop/study/data science/final_project/hot_coffee/data/Parking_Violations_Issued_-_Fiscal_Year_2021.csv'
## 274 Date First Observed no trailing characters ,200,619 'C:/Users/YULIU/Desktop/study/data science/final_project/hot_coffee/data/Parking_Violations_Issued_-_Fiscal_Year_2021.csv'
## 458 Date First Observed no trailing characters ,200,605 'C:/Users/YULIU/Desktop/study/data science/final_project/hot_coffee/data/Parking_Violations_Issued_-_Fiscal_Year_2021.csv'
## 468 Date First Observed no trailing characters ,200,702 'C:/Users/YULIU/Desktop/study/data science/final_project/hot_coffee/data/Parking_Violations_Issued_-_Fiscal_Year_2021.csv'
## 590 Law Section         no trailing characters ,111     'C:/Users/YULIU/Desktop/study/data science/final_project/hot_coffee/data/Parking_Violations_Issued_-_Fiscal_Year_2021.csv'
## ... ................... ...................... ........ ..........................................................................................................................
## See problems(...) for more details.

Cafe Map

plot_cafe_map =
  parking %>%
  count(street_name, name = "ticket") %>%
  right_join(cafe) %>%
  mutate(ticket = replace_na(ticket,1e-10))
## Joining, by = "street_name"
pal = colorNumeric(palette = c("viridis", "magma", "inferno","plasma")[[4]],
                   domain = plot_cafe_map$ticket %>% log())

plot_cafe_map =
  plot_cafe_map%>%
  mutate(pop =
           str_c("<b>",business_name,"</b><br>",round(ticket)," tickets")) %>% 
  leaflet() %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addCircleMarkers(
    ~ long,
    ~ lat,
    color = ~pal(ticket %>% log()),
    radius = .1,
    popup = ~ (pop)
  )

plot_cafe_map

plot violation vs time

convert date and time format

parking_time = 
  parking %>% 
  select(issue_date, violation_time, summons_number, vehicle_make) %>% 

  separate(issue_date, into = c('month', 'day', 'year'), sep = '/') %>% 
  separate(violation_time, into = c('hour', 'min', 'am_pm'), sep = c(2,4)) %>% 
  filter(am_pm %in% c('P', 'A')) %>% 
  mutate(am_pm = recode(am_pm, `P` = 12, `A` = 0),
         hour = as.numeric(hour)) %>% 
  mutate(hour = hour + am_pm,
         month = as.numeric(month)) %>% 
  select(-year) 

Make line plots: violation vs month

parking_time %>% 
  group_by(month) %>% 
  summarize(n = n()) %>%
  plot_ly(x = ~month, y =~n, type = 'scatter',mode = 'line')%>%
  layout(
    title = 'Violations per Month',
    xaxis = list(
      type = 'category',
      title = 'Month'),
    yaxis = list(
      title = 'Count of violations'))
## `summarise()` ungrouping output (override with `.groups` argument)
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

Line plots: vilation vs time

parking_time %>% 
  group_by(hour) %>% 
  summarize(n = n()) %>%
  plot_ly(x = ~hour, y =~n, type = 'scatter',mode = 'line')%>%
  layout(
    title = 'Violations per Hour',
    xaxis = list(
      type = 'category',
      title = 'Hour',
      range = c(0, 24)),
    yaxis = list(
      title = 'Count of violations'))
## `summarise()` ungrouping output (override with `.groups` argument)